In [6]:

    
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

Pandas

pandas is an open source, BSD-licensed library providing high-performance, easy-to-use data structures and data analysis tools for the Python programming language.

Pandas Website

This tutorial pulls from the Pandas website and the Handson-ML tutorial: https://github.com/ageron/handson-ml and the Pandas's documentation tutorial

Basic Data Structures: Series and Objects

See: https://pandas.pydata.org/pandas-docs/stable/dsintro.html

Series are a 1D array or a single column vector



In [20]:

    
v = pd.Series(np.random.randn(5))
v









    Out[20]:





0    0.688002
1    0.046689
2    0.481630
3    1.102089
4    1.399440
dtype: float64

Index is defaulted to start at 0



In [5]:

    
index = [1,2,3,4,5]
letters = ['a', 'b', 'c', 'd', 'e']
v = pd.Series(letters, index=index)
v









    Out[5]:





1    a
2    b
3    c
4    d
5    e
dtype: object



In [21]:

    
type(v)









    Out[21]:





pandas.core.series.Series



In [22]:

    
d = {'a' : 0., 'b' : 1., 'c' : 2.} # dictionary object
d









    Out[22]:





{'a': 0.0, 'b': 1.0, 'c': 2.0}



In [23]:

    
d_v = pd.Series(d)  
d_v









    Out[23]:





a    0.0
b    1.0
c    2.0
dtype: float64

Convert dictionary to series, and set the key as the index and the value as the data point



In [24]:

    
d_v['b'] # by index value









    Out[24]:





1.0



In [25]:

    
d_v[2]   # by row position









    Out[25]:





2.0



In [26]:

    
pd.Series(d, index=['b', 'c', 'd', 'a'])









    Out[26]:





b    1.0
c    2.0
d    NaN
a    0.0
dtype: float64



In [27]:

    
pd.Series(5., index=['a', 'b', 'c', 'd', 'e'])









    Out[27]:





a    5.0
b    5.0
c    5.0
d    5.0
e    5.0
dtype: float64



In [9]:

    
temperatures = [4.4,5.1,6.1,6.2,6.1,6.1,5.7,5.2,4.7,4.1,3.9,3.5]
s7 = pd.Series(temperatures, name="Temperature")
s7.plot()
plt.show()



In [10]:

    
dates = pd.date_range('2016/10/29 5:30pm', periods=12, freq='H')
dates









    Out[10]:





DatetimeIndex(['2016-10-29 17:30:00', '2016-10-29 18:30:00',
               '2016-10-29 19:30:00', '2016-10-29 20:30:00',
               '2016-10-29 21:30:00', '2016-10-29 22:30:00',
               '2016-10-29 23:30:00', '2016-10-30 00:30:00',
               '2016-10-30 01:30:00', '2016-10-30 02:30:00',
               '2016-10-30 03:30:00', '2016-10-30 04:30:00'],
              dtype='datetime64[ns]', freq='H')



In [11]:

    
temp_series = pd.Series(temperatures, dates)
temp_series  # where dates is the index









    Out[11]:





2016-10-29 17:30:00    4.4
2016-10-29 18:30:00    5.1
2016-10-29 19:30:00    6.1
2016-10-29 20:30:00    6.2
2016-10-29 21:30:00    6.1
2016-10-29 22:30:00    6.1
2016-10-29 23:30:00    5.7
2016-10-30 00:30:00    5.2
2016-10-30 01:30:00    4.7
2016-10-30 02:30:00    4.1
2016-10-30 03:30:00    3.9
2016-10-30 04:30:00    3.5
Freq: H, dtype: float64



In [12]:

    
temp_series.plot(kind="bar")

plt.grid(True)
plt.show()



In [13]:

    
temp_series.plot(kind="line")
plt.show()

Series Attributes



In [115]:

    
temp_series









    Out[115]:





2016-10-29 17:30:00    4.4
2016-10-29 18:30:00    5.1
2016-10-29 19:30:00    6.1
2016-10-29 20:30:00    6.2
2016-10-29 21:30:00    6.1
2016-10-29 22:30:00    6.1
2016-10-29 23:30:00    5.7
2016-10-30 00:30:00    5.2
2016-10-30 01:30:00    4.7
2016-10-30 02:30:00    4.1
2016-10-30 03:30:00    3.9
2016-10-30 04:30:00    3.5
Freq: H, dtype: float64



In [116]:

    
temp_series.shape









    Out[116]:





(12,)



In [126]:

    
temp_series.size









    Out[126]:





12



In [120]:

    
temp_series.dtype









    Out[120]:





dtype('float64')



In [121]:

    
temp_series.hasnans  # Does the series have NaN values?









    Out[121]:





False



In [122]:

    
temp_series.values









    Out[122]:





array([ 4.4,  5.1,  6.1,  6.2,  6.1,  6.1,  5.7,  5.2,  4.7,  4.1,  3.9,
        3.5])

Series Methods



In [130]:

    
ones = np.ones(temp_series.size)
temp_series.add(ones)









    Out[130]:





2016-10-29 17:30:00    5.4
2016-10-29 18:30:00    6.1
2016-10-29 19:30:00    7.1
2016-10-29 20:30:00    7.2
2016-10-29 21:30:00    7.1
2016-10-29 22:30:00    7.1
2016-10-29 23:30:00    6.7
2016-10-30 00:30:00    6.2
2016-10-30 01:30:00    5.7
2016-10-30 02:30:00    5.1
2016-10-30 03:30:00    4.9
2016-10-30 04:30:00    4.5
Freq: H, dtype: float64



In [131]:

    
def square(value):
    return value * value



In [132]:

    
temp_series.apply(square)  # apply method over all cells in a Series object









    Out[132]:





2016-10-29 17:30:00    19.36
2016-10-29 18:30:00    26.01
2016-10-29 19:30:00    37.21
2016-10-29 20:30:00    38.44
2016-10-29 21:30:00    37.21
2016-10-29 22:30:00    37.21
2016-10-29 23:30:00    32.49
2016-10-30 00:30:00    27.04
2016-10-30 01:30:00    22.09
2016-10-30 02:30:00    16.81
2016-10-30 03:30:00    15.21
2016-10-30 04:30:00    12.25
Freq: H, dtype: float64



In [136]:

    
temp_series.at_time('17:30')









    Out[136]:





2016-10-29 17:30:00    4.4
Freq: H, dtype: float64



In [140]:

    
temp_series.between_time(start_time='17:30', end_time='19:30')









    Out[140]:





2016-10-29 17:30:00    4.4
2016-10-29 18:30:00    5.1
2016-10-29 19:30:00    6.1
Freq: H, dtype: float64



In [141]:

    
temp_series.describe()









    Out[141]:





count    12.000000
mean      5.091667
std       0.964326
min       3.500000
25%       4.325000
50%       5.150000
75%       6.100000
max       6.200000
dtype: float64



In [143]:

    
temp_series.head(3)









    Out[143]:





2016-10-29 17:30:00    4.4
2016-10-29 18:30:00    5.1
2016-10-29 19:30:00    6.1
Freq: H, dtype: float64



In [150]:

    
for item in temp_series.items():
    print("Time: {}, Value: {}".format(item[0], item[1]))









    



Time: 2016-10-29 17:30:00, Value: 4.4
Time: 2016-10-29 18:30:00, Value: 5.1
Time: 2016-10-29 19:30:00, Value: 6.1
Time: 2016-10-29 20:30:00, Value: 6.2
Time: 2016-10-29 21:30:00, Value: 6.1
Time: 2016-10-29 22:30:00, Value: 6.1
Time: 2016-10-29 23:30:00, Value: 5.7
Time: 2016-10-30 00:30:00, Value: 5.2
Time: 2016-10-30 01:30:00, Value: 4.7
Time: 2016-10-30 02:30:00, Value: 4.1
Time: 2016-10-30 03:30:00, Value: 3.9
Time: 2016-10-30 04:30:00, Value: 3.5



In [151]:

    
temp_series.mode()









    Out[151]:





0    6.1
dtype: float64



In [152]:

    
temp_series.value_counts()









    Out[152]:





6.1    3
5.2    1
4.1    1
4.7    1
6.2    1
3.9    1
4.4    1
5.1    1
5.7    1
3.5    1
dtype: int64



In [153]:

    
temp_series.sort_values()









    Out[153]:





2016-10-30 04:30:00    3.5
2016-10-30 03:30:00    3.9
2016-10-30 02:30:00    4.1
2016-10-29 17:30:00    4.4
2016-10-30 01:30:00    4.7
2016-10-29 18:30:00    5.1
2016-10-30 00:30:00    5.2
2016-10-29 23:30:00    5.7
2016-10-29 19:30:00    6.1
2016-10-29 21:30:00    6.1
2016-10-29 22:30:00    6.1
2016-10-29 20:30:00    6.2
dtype: float64



In [154]:

    
temp_series.sort_index()









    Out[154]:





2016-10-29 17:30:00    4.4
2016-10-29 18:30:00    5.1
2016-10-29 19:30:00    6.1
2016-10-29 20:30:00    6.2
2016-10-29 21:30:00    6.1
2016-10-29 22:30:00    6.1
2016-10-29 23:30:00    5.7
2016-10-30 00:30:00    5.2
2016-10-30 01:30:00    4.7
2016-10-30 02:30:00    4.1
2016-10-30 03:30:00    3.9
2016-10-30 04:30:00    3.5
Freq: H, dtype: float64



In [155]:

    
temp_series.to_dict()









    Out[155]:





{Timestamp('2016-10-29 17:30:00', freq='H'): 4.4000000000000004,
 Timestamp('2016-10-29 18:30:00', freq='H'): 5.0999999999999996,
 Timestamp('2016-10-29 19:30:00', freq='H'): 6.0999999999999996,
 Timestamp('2016-10-29 20:30:00', freq='H'): 6.2000000000000002,
 Timestamp('2016-10-29 21:30:00', freq='H'): 6.0999999999999996,
 Timestamp('2016-10-29 22:30:00', freq='H'): 6.0999999999999996,
 Timestamp('2016-10-29 23:30:00', freq='H'): 5.7000000000000002,
 Timestamp('2016-10-30 00:30:00', freq='H'): 5.2000000000000002,
 Timestamp('2016-10-30 01:30:00', freq='H'): 4.7000000000000002,
 Timestamp('2016-10-30 02:30:00', freq='H'): 4.0999999999999996,
 Timestamp('2016-10-30 03:30:00', freq='H'): 3.8999999999999999,
 Timestamp('2016-10-30 04:30:00', freq='H'): 3.5}



In [160]:

    
temp_series.sample(frac=.25, random_state=42)  # return 25% of set









    Out[160]:





2016-10-30 03:30:00    3.9
2016-10-30 02:30:00    4.1
2016-10-29 17:30:00    4.4
dtype: float64

Dataframes

Dataframes are a 2-dimensional labeled data structure with columns of potentially different types



In [15]:

    
people_dict = {
    "weight": pd.Series([68, 83, 112], index=["alice", "bob", "charles"]),
    "birthyear": pd.Series([1984, 1985, 1992], index=["bob", "alice", "charles"], name="year"),
    "children": pd.Series([0, 3], index=["charles", "bob"]),
    "hobby": pd.Series(["Biking", "Dancing"], index=["alice", "bob"]),
}
people = pd.DataFrame(people_dict)
people









    Out[15]:







  
    
      
      birthyear
      children
      hobby
      weight
    
  
  
    
      alice
      1985
      NaN
      Biking
      68
    
    
      bob
      1984
      3.0
      Dancing
      83
    
    
      charles
      1992
      0.0
      NaN
      112

Transpose



In [16]:

    
people.T









    Out[16]:







  
    
      
      alice
      bob
      charles
    
  
  
    
      birthyear
      1985
      1984
      1992
    
    
      children
      NaN
      3
      0
    
    
      hobby
      Biking
      Dancing
      NaN
    
    
      weight
      68
      83
      112



In [17]:

    
people









    Out[17]:







  
    
      
      birthyear
      children
      hobby
      weight
    
  
  
    
      alice
      1985
      NaN
      Biking
      68
    
    
      bob
      1984
      3.0
      Dancing
      83
    
    
      charles
      1992
      0.0
      NaN
      112

Dataframe Attributes



In [161]:

    
people.head(3)









    Out[161]:







  
    
      
      birthyear
      height
      children
      hobby
      weight
      pets
    
  
  
    
      alice
      1985
      172
      NaN
      Biking
      68
      NaN
    
    
      bob
      1984
      181
      3.0
      Dancing
      83
      0.0
    
    
      charles
      1992
      185
      0.0
      NaN
      112
      5.0



In [162]:

    
people.tail(2)









    Out[162]:







  
    
      
      birthyear
      height
      children
      hobby
      weight
      pets
    
  
  
    
      bob
      1984
      181
      3.0
      Dancing
      83
      0.0
    
    
      charles
      1992
      185
      0.0
      NaN
      112
      5.0



In [163]:

    
people.T









    Out[163]:







  
    
      
      alice
      bob
      charles
    
  
  
    
      birthyear
      1985
      1984
      1992
    
    
      height
      172
      181
      185
    
    
      children
      NaN
      3
      0
    
    
      hobby
      Biking
      Dancing
      NaN
    
    
      weight
      68
      83
      112
    
    
      pets
      NaN
      0
      5



In [164]:

    
people.shape









    Out[164]:





(3, 6)

Dataframe Methods



In [166]:

    
people.corr()



In [171]:

    
import seaborn as sns
import matplotlib
%matplotlib inline



In [172]:

    
sns.heatmap(people.corr())









    Out[172]:





<matplotlib.axes._subplots.AxesSubplot at 0x112082358>

Selecting specific rows, columns, or cells

Rows

Referencing a named index

loc is used for label-based indexing



In [24]:

    
people.loc['charles']









    Out[24]:





birthyear    1992
children        0
hobby         NaN
weight        112
Name: charles, dtype: object

Referencing a row number

iloc is used for position-based indexing



In [25]:

    
people.iloc[2,]









    Out[25]:





birthyear    1992
children        0
hobby         NaN
weight        112
Name: charles, dtype: object



In [26]:

    
people.iloc[2:,]









    Out[26]:







  
    
      
      birthyear
      children
      hobby
      weight
    
  
  
    
      charles
      1992
      0.0
      NaN
      112



In [42]:

    
people['charles'] # error









    



---------------------------------------------------------------
KeyError                      Traceback (most recent call last)
/Users/carrie/anaconda/envs/mlbook/lib/python3.5/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2441             try:
-> 2442                 return self._engine.get_loc(key)
   2443             except KeyError:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5280)()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5126)()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas/_libs/hashtable.c:20523)()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas/_libs/hashtable.c:20477)()

KeyError: 'charles'

During handling of the above exception, another exception occurred:

KeyError                      Traceback (most recent call last)
<ipython-input-42-1c52b1a359c6> in <module>()
----> 1 people['charles'] # error

/Users/carrie/anaconda/envs/mlbook/lib/python3.5/site-packages/pandas/core/frame.py in __getitem__(self, key)
   1962             return self._getitem_multilevel(key)
   1963         else:
-> 1964             return self._getitem_column(key)
   1965 
   1966     def _getitem_column(self, key):

/Users/carrie/anaconda/envs/mlbook/lib/python3.5/site-packages/pandas/core/frame.py in _getitem_column(self, key)
   1969         # get column
   1970         if self.columns.is_unique:
-> 1971             return self._get_item_cache(key)
   1972 
   1973         # duplicate columns & possible reduce dimensionality

/Users/carrie/anaconda/envs/mlbook/lib/python3.5/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
   1643         res = cache.get(item)
   1644         if res is None:
-> 1645             values = self._data.get(item)
   1646             res = self._box_item_values(item, values)
   1647             cache[item] = res

/Users/carrie/anaconda/envs/mlbook/lib/python3.5/site-packages/pandas/core/internals.py in get(self, item, fastpath)
   3588 
   3589             if not isnull(item):
-> 3590                 loc = self.items.get_loc(item)
   3591             else:
   3592                 indexer = np.arange(len(self.items))[isnull(self.items)]

/Users/carrie/anaconda/envs/mlbook/lib/python3.5/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2442                 return self._engine.get_loc(key)
   2443             except KeyError:
-> 2444                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2445 
   2446         indexer = self.get_indexer([key], method=method, tolerance=tolerance)

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5280)()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5126)()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas/_libs/hashtable.c:20523)()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas/_libs/hashtable.c:20477)()

KeyError: 'charles'

Columns

Referencing a named index



In [40]:

    
people[['weight']]



In [57]:

    
people.loc[:,'weight']









    Out[57]:





alice       68
bob         83
charles    112
Name: weight, dtype: int64

Referencing a row number



In [43]:

    
people.iloc[:,3]  # 0-index based









    Out[43]:





alice       68
bob         83
charles    112
Name: weight, dtype: int64



In [53]:

    
people.ix[3]









    



----------------------------------
IndexErrorTraceback (most recent call last)
<ipython-input-53-a5b01185c5f3> in <module>()
----> 1 people.ix[3]

/Users/carrie/anaconda/lib/python3.5/site-packages/pandas/core/indexing.py in __getitem__(self, key)
    119         else:
    120             key = com._apply_if_callable(key, self.obj)
--> 121             return self._getitem_axis(key, axis=0)
    122 
    123     def _get_label(self, label, axis=0):

/Users/carrie/anaconda/lib/python3.5/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
   1073                 # this is the fallback! (for a non-float, non-integer index)
   1074                 if not labels.is_floating() and not labels.is_integer():
-> 1075                     return self._get_loc(key, axis=axis)
   1076 
   1077             return self._get_label(key, axis=axis)

/Users/carrie/anaconda/lib/python3.5/site-packages/pandas/core/indexing.py in _get_loc(self, key, axis)
    137 
    138     def _get_loc(self, key, axis=0):
--> 139         return self.obj._ixs(key, axis=axis)
    140 
    141     def _slice(self, obj, axis=0, kind=None):

/Users/carrie/anaconda/lib/python3.5/site-packages/pandas/core/frame.py in _ixs(self, i, axis)
   1881                 return self[i]
   1882             else:
-> 1883                 label = self.index[i]
   1884                 if isinstance(label, Index):
   1885                     # a location index by definition

/Users/carrie/anaconda/lib/python3.5/site-packages/pandas/core/indexes/base.py in __getitem__(self, key)
   1687 
   1688         if is_scalar(key):
-> 1689             return getitem(key)
   1690 
   1691         if isinstance(key, slice):

IndexError: index 3 is out of bounds for axis 0 with size 3



In [67]:

    
people.iloc[:,3]['alice']  # 0-index based









    Out[67]:





68

Specific Cells



In [60]:

    
people









    Out[60]:







  
    
      
      birthyear
      children
      hobby
      weight
    
  
  
    
      alice
      1985
      NaN
      Biking
      68
    
    
      bob
      1984
      3.0
      Dancing
      83
    
    
      charles
      1992
      0.0
      NaN
      112



In [62]:

    
people.iloc[1,0]  # Unnamed index, column









    Out[62]:





1984



In [63]:

    
people.loc['bob', 'birthyear'] # Named index, column









    Out[63]:





1984



In [68]:

    
people.iloc[:,0]['bob'] # Named index, unnamed column









    Out[68]:





1984



In [70]:

    
people.loc['bob', :][0] # Named index, unnamed column









    Out[70]:





1984



In [77]:

    
people.iloc[1,:]['birthyear'] # Unnamed index, named column









    Out[77]:





1984



In [79]:

    
people.loc[:,'birthyear'][1] # Unnamed index, named column









    Out[79]:





1984



In [71]:

    
people.iloc[:,0][1] # Unnamed index, unnamed column









    Out[71]:





1984



In [72]:

    
people.loc['bob', :]['birthyear'] # Named index, named column









    Out[72]:





1984

Slicing



In [45]:

    
people.iloc[1:3] # return slice of rows, from 2-3









    Out[45]:







  
    
      
      birthyear
      children
      hobby
      weight
    
  
  
    
      bob
      1984
      3.0
      Dancing
      83
    
    
      charles
      1992
      0.0
      NaN
      112



In [46]:

    
people[people["birthyear"] < 1990]









    Out[46]:







  
    
      
      birthyear
      children
      hobby
      weight
    
  
  
    
      alice
      1985
      NaN
      Biking
      68
    
    
      bob
      1984
      3.0
      Dancing
      83

people["age"] = 2016 - people["birthyear"] # adds a new column "age" people["over 30"] = people["age"] > 30 # adds another column "over 30" birthyears = people.pop("birthyear") del people["children"]

people



In [48]:

    
birthyears









    Out[48]:





alice      1985
bob        1984
charles    1992
Name: birthyear, dtype: int64



In [84]:

    
people["pets"] = pd.Series({"bob": 0, "charles": 5, "eugene":1})  # alice is missing, eugene is ignored
people









    Out[84]:







  
    
      
      birthyear
      children
      hobby
      weight
      pets
    
  
  
    
      alice
      1985
      NaN
      Biking
      68
      NaN
    
    
      bob
      1984
      3.0
      Dancing
      83
      0.0
    
    
      charles
      1992
      0.0
      NaN
      112
      5.0



In [85]:

    
people.insert(1, "height", [172, 181, 185])
people









    Out[85]:







  
    
      
      birthyear
      height
      children
      hobby
      weight
      pets
    
  
  
    
      alice
      1985
      172
      NaN
      Biking
      68
      NaN
    
    
      bob
      1984
      181
      3.0
      Dancing
      83
      0.0
    
    
      charles
      1992
      185
      0.0
      NaN
      112
      5.0

Auxillary Methods

Plotting



In [86]:

    
people.plot(kind = "scatter", x = "height", y = "weight", s=[40, 120, 200])
plt.show()



In [ ]:



In [87]:

    
people.assign(
    body_mass_index = people["weight"] / (people["height"] / 100) ** 2,
    has_pets = people["pets"] > 0
)









    Out[87]:







  
    
      
      birthyear
      height
      children
      hobby
      weight
      pets
      body_mass_index
      has_pets
    
  
  
    
      alice
      1985
      172
      NaN
      Biking
      68
      NaN
      22.985398
      False
    
    
      bob
      1984
      181
      3.0
      Dancing
      83
      0.0
      25.335002
      False
    
    
      charles
      1992
      185
      0.0
      NaN
      112
      5.0
      32.724617
      True



In [106]:

    
# Let's look at people again,
people









    Out[106]:







  
    
      
      birthyear
      height
      children
      hobby
      weight
      pets
    
  
  
    
      alice
      1985
      172
      NaN
      Biking
      68
      NaN
    
    
      bob
      1984
      181
      3.0
      Dancing
      83
      0.0
    
    
      charles
      1992
      185
      0.0
      NaN
      112
      5.0



In [54]:

    
people.info()









    



<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, alice to charles
Data columns (total 6 columns):
hobby      2 non-null object
height     3 non-null int64
weight     3 non-null int64
age        3 non-null int64
over 30    3 non-null bool
pets       2 non-null float64
dtypes: bool(1), float64(1), int64(3), object(1)
memory usage: 227.0+ bytes

Descriptive Statistics



In [88]:

    
people.describe(include='all')









    Out[88]:







  
    
      
      birthyear
      height
      children
      hobby
      weight
      pets
    
  
  
    
      count
      3.000000
      3.000000
      2.00000
      2
      3.000000
      2.000000
    
    
      unique
      NaN
      NaN
      NaN
      2
      NaN
      NaN
    
    
      top
      NaN
      NaN
      NaN
      Biking
      NaN
      NaN
    
    
      freq
      NaN
      NaN
      NaN
      1
      NaN
      NaN
    
    
      mean
      1987.000000
      179.333333
      1.50000
      NaN
      87.666667
      2.500000
    
    
      std
      4.358899
      6.658328
      2.12132
      NaN
      22.368132
      3.535534
    
    
      min
      1984.000000
      172.000000
      0.00000
      NaN
      68.000000
      0.000000
    
    
      25%
      1984.500000
      176.500000
      0.75000
      NaN
      75.500000
      1.250000
    
    
      50%
      1985.000000
      181.000000
      1.50000
      NaN
      83.000000
      2.500000
    
    
      75%
      1988.500000
      183.000000
      2.25000
      NaN
      97.500000
      3.750000
    
    
      max
      1992.000000
      185.000000
      3.00000
      NaN
      112.000000
      5.000000



In [92]:

    
people.height.min()









    Out[92]:





172



In [94]:

    
people.children.max()









    Out[94]:





3.0

	alice	bob	charles
birthyear	1985	1984	1992
children	NaN	3	0
hobby	Biking	Dancing	NaN
weight	68	83	112

	birthyear	height	children	hobby	weight	pets
alice	1985	172	NaN	Biking	68	NaN
bob	1984	181	3.0	Dancing	83	0.0
charles	1992	185	0.0	NaN	112	5.0

	birthyear	height	children	weight	pets
birthyear	1.000000	0.654654	-1.0	0.897432	1.0
height	0.654654	1.000000	-1.0	0.920987	1.0
children	-1.000000	-1.000000	1.0	-1.000000	-1.0
weight	0.897432	0.920987	-1.0	1.000000	1.0
pets	1.000000	1.000000	-1.0	1.000000	1.0

	birthyear	height	children	hobby	weight	pets
count	3.000000	3.000000	2.00000	2	3.000000	2.000000
unique	NaN	NaN	NaN	2	NaN	NaN
top	NaN	NaN	NaN	Biking	NaN	NaN
freq	NaN	NaN	NaN	1	NaN	NaN
mean	1987.000000	179.333333	1.50000	NaN	87.666667	2.500000
std	4.358899	6.658328	2.12132	NaN	22.368132	3.535534
min	1984.000000	172.000000	0.00000	NaN	68.000000	0.000000
25%	1984.500000	176.500000	0.75000	NaN	75.500000	1.250000
50%	1985.000000	181.000000	1.50000	NaN	83.000000	2.500000
75%	1988.500000	183.000000	2.25000	NaN	97.500000	3.750000
max	1992.000000	185.000000	3.00000	NaN	112.000000	5.000000